scenario-notebooks/Tools/PerfTools_Log Analytics Query.ipynb (350 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "source": [ "# Performance Tools - Azure Log Analytics Query\n", "\n", "__Notebook Version:__ 1.0<br>\n", "__Python Version:__ Python 3.8<br>\n", "__Apache Spark Version:__ 3.1<br>\n", "__Required Packages:__ azure-monitor-query, azure-mgmt-loganalytics<br>\n", "__Platforms Supported:__ Azure Synapse Analytics\n", " \n", "__Data Source Required:__ Log Analytics custom table defined\n", " \n", "### Description\n", "This notebook measures query performance against Azure Log Analytics using its data query API.<br>\n", "*** Please run the cells sequentially to avoid errors. Please do not use \"run all cells\". *** <br>\n", "Need to know more about KQL? [Getting started with Kusto Query Language](https://docs.microsoft.com/azure/data-explorer/kusto/concepts/).\n", "\n", "## Table of Contents\n", "1. Warm-up\n", "2. Azure Log Analytics Data Queries\n", "3. Save result to ADX" ], "metadata": {} }, { "cell_type": "markdown", "source": [ "## 1. Warm-up" ], "metadata": {} }, { "cell_type": "code", "source": [ "# Load Python libraries that will be used in this notebook\r\n", "from azure.mgmt.resource import ResourceManagementClient\r\n", "from azure.mgmt.kusto import KustoManagementClient\r\n", "from azure.kusto.data.exceptions import KustoServiceError\r\n", "from azure.kusto.data.helpers import dataframe_from_result_table\r\n", "from azure.kusto.data import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties\r\n", "\r\n", "from azure.kusto.ingest import (\r\n", " BlobDescriptor,\r\n", " FileDescriptor,\r\n", " IngestionProperties,\r\n", " IngestionStatus,\r\n", " KustoStreamingIngestClient,\r\n", " ManagedStreamingIngestClient,\r\n", " QueuedIngestClient,\r\n", " StreamDescriptor,\r\n", ")\r\n", "\r\n", "from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential\r\n", "from azure.core.exceptions import HttpResponseError \r\n", "from azure.mgmt.loganalytics import LogAnalyticsManagementClient\r\n", "from azure.monitor.query import LogsQueryClient, MetricsQueryClient, LogsQueryStatus\r\n", "\r\n", "import sys\r\n", "from datetime import datetime, timezone, timedelta\r\n", "import requests\r\n", "import pandas as pd\r\n", "import numpy\r\n", "import json\r\n", "import math\r\n", "import ipywidgets\r\n", "from IPython.display import display, HTML, Markdown" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "tenant_id = \"\"\r\n", "subscription_id = \"\"\r\n", "\r\n", "akv_name = \"\"\r\n", "client_id_name = \"\"\r\n", "client_secret_name = \"\"\r\n", "akv_link_name = \"\"" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# User input for Log Analytics workspace as the data source for querying\r\n", "subscription_id_source = \"\"\r\n", "resource_group_name_source = \"\"\r\n", "workspace_name_source = \"\"\r\n", "workspace_id_source = \"\"\r\n", "workspace_resource_id_source = \"/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.OperationalInsights/workspaces/{2}\".format(subscription_id_source, resource_group_name_source, workspace_name_source)\r\n" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# You may need to change resource_uri for various cloud environments.\r\n", "resource_uri = \"https://api.loganalytics.io\"\r\n", "client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)\r\n", "client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)\r\n", "\r\n", "credential = ClientSecretCredential(\r\n", " tenant_id=tenant_id, \r\n", " client_id=client_id, \r\n", " client_secret=client_secret)\r\n", "access_token = credential.get_token(resource_uri + \"/.default\")\r\n", "token = access_token[0]" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 2. Azure Log Analytics Data Queries" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Functions for query\r\n", "def query_la(workspace_id_query, query):\r\n", " la_data_client = LogsQueryClient(credential=credential)\r\n", " end_time = datetime.now(timezone.utc)\r\n", " start_time = end_time - timedelta(7)\r\n", "\r\n", " print(query)\r\n", " print('starting ' + str(end_time.timestamp()))\r\n", " la_data_client.query_workspace(\r\n", " workspace_id=workspace_id_query,\r\n", " query=query,\r\n", " timespan=(start_time, end_time))\r\n", " final_time = datetime.now(timezone.utc)\r\n", " print('ending ' + str(final_time.timestamp()))\r\n", " return (final_time - end_time).total_seconds()\r\n", "\r\n", "\r\n", "def slice_query_la(query, lookback_start, lookback_end='0', lookback_unit='h', query_row_limit=400000, split_factor=2):\r\n", " \"Slice the time to render records <= 500K\"\r\n", " count_query = query.format(lookback_start, lookback_unit, lookback_end)\r\n", " count = ' | summarize count()'\r\n", " count_query = count_query + count\r\n", " df_count = query_la(workspace_id_source, count_query)\r\n", " row_count = df_count['count_'][0]\r\n", " print(count_query)\r\n", " print(row_count)\r\n", " df_final = pd.DataFrame()\r\n", "\r\n", " if row_count > query_row_limit:\r\n", " number_of_divide = 0\r\n", " while row_count > query_row_limit:\r\n", " row_count = row_count / split_factor\r\n", " number_of_divide = number_of_divide + 1\r\n", "\r\n", " factor = split_factor ** number_of_divide\r\n", " step_number = math.ceil(int(lookback_start) / factor)\r\n", " if factor > int(lookback_start) and lookback_unit == 'h':\r\n", " lookback_unit = 'm'\r\n", " number_of_minutes = 60\r\n", " step_number = math.ceil(int(lookback_start)*number_of_minutes / factor)\r\n", "\r\n", " try:\r\n", " for i in range(int(lookback_end), factor + 1, 1):\r\n", " if i > 0:\r\n", " df_la_query = pd.DataFrame\r\n", " current_query = query.format(i * step_number, lookback_unit, (i - 1) * step_number)\r\n", " print(current_query)\r\n", " df_la_query = query_la(workspace_id_source, current_query)\r\n", " print(df_la_query.shape[0])\r\n", " df_final = pd.concat([df_final, df_la_query])\r\n", " except:\r\n", " print(\"query failed\")\r\n", " raise\r\n", " else:\r\n", " df_final = query_la(workspace_id_source, query.format(lookback_start, lookback_unit, lookback_end))\r\n", "\r\n", " return df_final" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "### Simple Data Query" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "lookback_period = '24'\r\n", "lookback_unit = 'h'\r\n", "number_of_item = '1000'\r\n", "query = \"CommonSecurityLog | where TimeGenerated >= ago({0}{1}) | take {2}\".format(lookback_period, lookback_unit, number_of_item)\r\n", "\r\n", "execution_seconds = query_la(workspace_id_source, query)\r\n" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 3. Save result to ADX" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "cluster = \"https://ingest-nbauto.eastus.kusto.windows.net\"\r\n", "kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(cluster, client_id, client_secret, tenant_id)\r\n", "kusto_client = QueuedIngestClient(kcsb)\r\n", "\r\n", "ingestion_props = IngestionProperties(\r\n", " database=\"nba1\",\r\n", " table=\"usxperf\",\r\n", ")\r\n", "\r\n", "fields = [\"TimeGenerated\", \"Service\", \"Query\", \"TimeInSeconds\"]\r\n", "rows = [[datetime.now(timezone.utc).strftime(\"%Y-%m-%dT%H:%M:%S.%fZ\"), \"Log Analytics Draft\", query, execution_seconds]]\r\n", "df = pd.DataFrame(data=rows, columns=fields)\r\n", "kusto_client.ingest_from_dataframe(df, ingestion_properties=ingestion_props)" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } } ], "metadata": { "kernelspec": { "name": "synapse_pyspark", "display_name": "Synapse PySpark" }, "language_info": { "name": "python" }, "description": null, "save_output": true, "synapse_widget": { "version": "0.1", "state": {} } }, "nbformat": 4, "nbformat_minor": 2 }